** Make a table of pre-treament means for main outcome variables
* wic redemptions by ZIP in 100Ks
* Mean level of WIC authorization by store type

*last modified: 8 January 2025
*last modified by: Charlotte Ambrozek
clear all

local in "./data/cleaned"
local out "./analysis/output/tables"

mkf ewic_fy
frame ewic_fy{
	use ./data/cleaned/ewic_rollout.dta, clear
	rename year fiscalyear
	drop pre* post* 
}

*import two separate cleaned redemptions datasets (second is balanced to only include ZIPs that have non-missing redemptions in all years)
mkf red_fy
frame red_fy{
	use ./data/cleaned/tip_wic_redemptions_05_18, clear
	rename (wic_redemptions) (wicred)
	gen wicredhk = wicred/100000
}

*import TIP store data into own frame
mkf tip_fy
frame tip_fy{
	local data_dir ./data/cleaned
	use `data_dir'/tip_auth_sq, clear
	*drop "Direct Distribution Center" and "Home Food Delivery Contractor" types
	gen f = (vendor_type1 == 3 & (vendor_type2 == 4 | missing(vendor_type2))) | (vendor_type1 == 4 & (vendor_type2 == 3 | missing(vendor_type2)))
	bys tip_id: egen flag = mode(f)
	bys tip_id: replace vendor_type1 = vendor_type1[_n+1] if flag == 0 & f == 1
	drop if flag == 1
	drop flag f tip_year_id
	*fill in gaps in state fips using modes
	generate str_fips = string(ct_fips)
	replace str_fips = "0" + str_fips if strlen(str_fips) == 4
	generate st_fips = substr(str_fips, 1, 2)
	destring st_fips, replace 
	
	*drop Vermont and mississippi because of different regiemes prior to eWIC
	*drop NV because ewic turn on then off then on again and we don't have reliable timings
	*drop missing st_fips
	drop if st_fips == 50 | st_fips == 32 | st_fips == 28 | missing(st_fips)

	*flag stores that specialize in WIC
	gen a50 = vendor_type1 == 1 | vendor_type1 == 7 | vendor_type2 == 1 | vendor_type2 == 7

	*link ewic implementation info
	frlink m:1 ct_fips fiscalyear, frame(ewic_fy) generate(ewic_link)
	frget ev_year , from(ewic_link)
	* assume treated at earliest exposure
	bys tip_id: egen ey_min = min(ev_year)
	
	* assume treated at earliest exposure
	replace ev_year = ey_min if ev_year != ey_min & !missing(ey_min)
	bys tip_id (ev_year): assert ev_year[1] == ev_year[_N]

	*assert no dups at future level of xtset 
	duplicates report tip_id fiscalyear 
	assert r(N) == r(unique_value)
	xtset tip_id fiscalyear
	compress
	
	*this zip has multiple implementation dates and can't be reconciled
	drop if zip == 42223
	
	*put fields necessary to create a frame of unique zip/year obs with ewic implementation; later will merge this to tip_fy again and then collapse to get aggregated zip level outcomes
	compress
	frame put zip fiscalyear ev_year state st_fips, into(zip_sq_frame)
}

frame zip_sq_frame{
	duplicates drop 
	
	bys zip: assert state[1] == state[_N]
	bys zip: assert st_fips[1] == st_fips[_N]
	*resolve discrepancies in values of ewic implementation variables as follows:
	*ev_year is equal to the min of ev_year across the zip/fiscal year (use the earliest possible event year)
	bys zip: egen ey_min = min(ev_year)
	* assume treated at earliest exposure
	replace ev_year = ey_min if ev_year != ey_min & !missing(ey_min)
	bys zip: assert ev_year[1] == ev_year[_N]

	duplicates drop

	duplicates report zip fiscalyear 
	assert r(N) == r(unique_value)	
	compress
}

cwf tip_fy
* keep only if pre-treatment
keep if fiscalyear < ev_year
frame put auth chain, into (chainmeans)
summ auth
local mean = round(r(mean), 0.01)
local sd = round(r(sd), 0.01)
matrix input pretmtmeanstab = (`mean', `sd')
*collapse (mean) authmean = auth (sd) sdauth = auth

cwf chainmeans
summ auth if chain == 1
local mean = round(r(mean), 0.01)
local sd = round(r(sd), 0.01)
matrix pretmtmeanstab = (pretmtmeanstab \ `mean', `sd')

summ auth if chain == 0
local mean = round(r(mean), 0.01)
local sd = round(r(sd), 0.01)
matrix pretmtmeanstab = (pretmtmeanstab \ `mean', `sd')


cwf red_fy 
frlink 1:1 zip fiscalyear, frame(zip_sq_frame) generate(zip_frame_link)
frget ev_year, from(zip_frame_link)
* keep only if pre-treatment 
keep if fiscalyear < ev_year
*collapse (mean) meeanredhk = wicredhk (sd) sdredhk = wicredhk

summ wicredhk
local mean = round(r(mean), 0.01)
local sd = round(r(sd), 0.01)
matrix pretmtmeanstab = (pretmtmeanstab \ `mean', `sd')

matrix rownames pretmtmeanstab = Authorization  ChainAuthorization IndepAuthorization RedemptionsHK
matrix colnames pretmtmeanstab = Mean SD

*collapse (mean) meanauth = auth (sd) sdauth = auth, by(chain)

esttab matrix(pretmtmeanstab) using `out'/pretmtmeans.tex, width(0.8\textwidth) label ///
	title(Pre-treatment summary of outcome variables\label{tab:pretmtmeans}) ///
	addnote("Sample restricted to store-years (authorization outcome) or ZIP-years (redemption outcome) prior to WIC EBT implementation.")


